Sanorama Hie et al., 2019
GitHub
Tutorial external API
External external API tutorial
A fix to run scran pooling normalization computeSumFactors in current python environment.
import scanpy as sc
import scanorama
import numpy as np
import pandas as pd
import os
# Working directory
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
# rpy2
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'
# Plotting
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}
sc.set_figure_params(figsize=(5, 5))
# Scanorama
dimred=100
knn=20
# Scanpy
n_neighbors=50
adata = sc.read_h5ad('data/object/so_sct.h5ad')
adata = adata.raw.to_adata()
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
sc.pp.scale(adata_tmp)
adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 14772 genes among all datasets Processing datasets (0, 1) Processing datasets (2, 3) Processing datasets (1, 3) Processing datasets (0, 2) Processing datasets (0, 3) Processing datasets (1, 2)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_8000 = list(adata.uns['hvg_int_8000'])
adata = adata.raw.to_adata()
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_8000]
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
sc.pp.scale(adata_tmp)
adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 8000 genes among all datasets Processing datasets (2, 3) Processing datasets (1, 3) Processing datasets (0, 1) Processing datasets (0, 2) Processing datasets (0, 3) Processing datasets (1, 2)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_6000 = list(adata.uns['hvg_int_6000'])
adata = adata.raw.to_adata()
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_6000]
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
sc.pp.scale(adata_tmp)
adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 6000 genes among all datasets Processing datasets (2, 3) Processing datasets (1, 3) Processing datasets (0, 1) Processing datasets (0, 2) Processing datasets (0, 3) Processing datasets (1, 2)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_4000 = list(adata.uns['hvg_int_4000'])
adata = adata.raw.to_adata()
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_4000]
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
sc.pp.scale(adata_tmp)
adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 4000 genes among all datasets Processing datasets (2, 3) Processing datasets (1, 3) Processing datasets (0, 1) Processing datasets (0, 2) Processing datasets (0, 3) Processing datasets (1, 2)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_2000 = list(adata.uns['hvg_int_2000'])
adata = adata.raw.to_adata()
def set_color(categories):
categories = [x for x in categories if x in list(adata.obs.columns)]
for category in categories:
adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
keys = list(color[category].keys())
keys = [x for x in keys if x in list(adata.obs[category])]
adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
# Set colors
set_color(list(color.keys()))
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_2000]
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
sc.pp.scale(adata_tmp)
adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)
# Concatenate scanorama output
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)
obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)
# Add X_scanorama integration to adata
adata.obsm["X_scanorama"] = X_scanorama
Found 2000 genes among all datasets Processing datasets (1, 3) Processing datasets (2, 3) Processing datasets (0, 1) Processing datasets (0, 2) Processing datasets (0, 3) Processing datasets (1, 2)
# # Dimensional reduction and clustering
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)
# Plot
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)